import re
import math
from scrapy import Request
from sdszfcg.rules import *
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.supplier_dao import SupplierDao
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'full_supplier'

    # 供应商三级分页接口
    supplier_catalog_url = 'http://ggzyjyzx.shandong.gov.cn/wssc/shopstore/getCatalogTree?sid={}&type=1'
    # 供应商清单页接口
    supplier_sku_url = 'http://ggzyjyzx.shandong.gov.cn/wssc/shopstore/{}/goods?page={}&rows={}&type=1&productName=&cid={}&orderby=&isAsc='

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        settings = get_project_settings()
        self.ladders = settings.get('PRICE_LADDER', [])
        self.rows = 16

    def start_requests(self):
        supplier_pool = SupplierDao().get_batch_supplier_list(self.batch_no,
                                                              fields={'_id': 1, 'name': 1, 'supplierLink': 1})
        self.logger.info("供应商量 -> %s" % (len(supplier_pool)))
        for supplier in supplier_pool:
            supplier_id = supplier.get('_id')
            yield Request(
                url=self.supplier_catalog_url.format(supplier_id),
                method='GET',
                meta={
                    'reqType': 'catalog',
                    'batchNo': self.batch_no,
                    'supplierId': supplier_id
                },
                callback=self.parse_supplier,
                errback=self.error_back,
                priority=100
            )

    def parse_supplier(self, response):
        meta = response.meta
        supplier_id = meta.get('supplierId')
        # 处理供应商分类
        cats = parse_supplier_catalog(response)

        if cats and len(cats) > 0:
            yield Box('catalog', self.batch_no, cats)
            self.logger.info('品类 len=%s supplierId=%s' % (len(cats), supplier_id))
            for cat in cats:
                if cat.get('level') == 3:
                    catalog_id = cat.get('catalogId')
                    catalog_name = cat.get('catalogName')
                    yield Request(
                        url=self.supplier_sku_url.format(supplier_id, 1, self.rows, catalog_id),
                        method='GET',
                        meta={
                            'reqType': 'catalog',
                            'batchNo': self.batch_no,
                            'catalogId': catalog_id,
                            'catalogName': catalog_name,
                            'supplierId': supplier_id,
                            'page': 1
                        },
                        callback=self.parse_sku,
                        errback=self.error_back,
                        priority=100
                    )
        else:
            self.logger.error('品类空: [%s] skuId -> [%s]' % (len(cats), supplier_id))

    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        supplier_id = meta.get('supplierId')
        catalog_id = meta.get('catalogId')
        catalog_name = meta.get('catalogName')
        # 处理商品清单
        cats = parse_sku(response)
        if cats and len(cats) > 0:
            yield Box('item', self.batch_no, cats)
            self.logger.info(
                '清单 supplierId=%s catalogId=%s curPage=%s cnt=%s' % (supplier_id, catalog_id, cur_page, len(cats)))
            # 有更多页，并发请求
            if cur_page == 1:
                total = json.loads(response.text)['page']['total']
                total_page = math.ceil(int(total) / self.rows)
                self.logger.info('总清单: supplierId=%s catalogId=%s totalPage=%s' % (
                    supplier_id, catalog_id, total_page))
                for page in range(2, total_page + 1):
                    yield Request(
                        url=self.supplier_sku_url.format(supplier_id, 1, self.rows, catalog_id),
                        method='GET',
                        meta={
                            'reqType': 'item',
                            'batchNo': self.batch_no,
                            'supplierId': supplier_id,
                            'page': page
                        },
                        callback=self.parse_sku,
                        errback=self.error_back,
                        priority=100
                    )
        else:
            self.logger.error('空清单: supplierId=%s catalogId=%s curPage=%s' % (supplier_id, catalog_id, cur_page))
